// Top10 holder ownership and mutual fund ownership data 
// Loading Shares Outstanding
import delimited "../src/TRD_Capchg.csv", clear

keep stkcd shrchgdt nshrttl nshra nshrb nshrh 
ren stkcd code

// Generated Date
gen y=substr(shrchgdt,1,4)
gen m=substr(shrchgdt,6,2)
gen d=substr(shrchgdt,9,2)

replace m=substr(shrchgdt,5,2) if length(shrchgdt )==8
replace d=substr(shrchgdt,7,2) if length(shrchgdt )==8

destring y m d, replace 
gen date=mdy(m,d,y)

tset code date
tsfill 

replace nshrttl = l1.nshrttl if nshrttl==.
replace nshra = l1.nshra if nshra==.
replace nshrb = l1.nshrb if nshrb==.
replace nshrh = l1.nshrh if nshrh==.

ren nshrttl shrout_all
ren nshra shrout_a
ren nshrb shrout_b
ren nshrh shrout_h

keep code date shrout_*
tempfile shrout
save `shrout'

// Loading Holding file (Split across two .csv files 1,000,000 obs in first...)

clear
import delimited "../src/HLD_Negshr.csv", clear
tempfile temp
save `temp', replace 

clear
import delimited "../src/HLD_Negshr1.csv", clear
append using `temp', force


gen y=substr(reptdt, 1, 4) 
gen m=substr(reptdt,6,2)
gen d=substr(reptdt,9,2)
destring y m d, replace 
gen date=mdy(m,d,y)

ren s0401a holder_name 
ren s0402a shares 
ren s0403a share_type
ren s0404a holder_rank

keep stkcd holder_name shares share_type holder_rank date y m


// Collapsing to Stock-Code Level
collapse (sum) shares , by(stkcd y m d)
ren stkcd code

merge 1:1 code date using `shrout'
drop if _m==2 
drop _m 

sort code date
by code: replace shrout_all = shrout_all[_n-1] if shrout_all==.
by code: replace shrout_a   = shrout_a[_n-1] if shrout_a==.
by code: replace shrout_b   = shrout_b[_n-1] if shrout_b==.
by code: replace shrout_h   = shrout_h[_n-1] if shrout_h==.

gen top10_ownership_total = shares/(shrout_all)
ren shares top10_shares

merge 1:1 code y m using  ../src/mf_shares.dta
drop if _m==2 

ren shares mf_shares 
replace mf_shares = 0 if mf_shares==. & _m==1
drop _m breadth marketvalue

gen mf_ownership_total = mf_shares/(shrout_all)

keep if y>=2009 & y<=2016

label var top10_ownership_total "shares held by top 10 shareholders, scaled by all outstanding shares"
label var mf_ownership_total "shares held by mutual funds, scaled by all outstanding shares"

keep code date top10_ownership_total mf_ownership_total

// Stacking Data for Analysis
gen date2=mofd(date)
format date %td
format date2 %tm
merge 1:1 code date2 using ../dta/stock_monthly_vintage.dta, keep(match) keepus(vintage)

* Creating Clean Stacked Datasets
foreach v in 2 3 4{
	preserve
		keep if vintage==`v'|vintage==.
		gen cohort=`v'
		* Initialize the new variable
		gen quarters_until_target = .
		* Calculate the difference for each vintage
		if `v'==2{
			replace quarters_until_target = qofd(dofm(date2)) - qofd(date("2013-01-25", "YMD")) 
		}
		if `v'==3{
			replace quarters_until_target = qofd(dofm(date2)) - qofd(date("2013-09-06", "YMD")) 
			}
		if `v'==4{
			replace quarters_until_target = qofd(dofm(date2)) - qofd(date("2014-09-12", "YMD")) 
		}
		tempfile `v'temp
		save ``v'temp'
	restore
}

* Appending Clean Stacked Datasets
clear
foreach v in 2 3 4{
	append using ``v'temp'
}

save ../dta/io_stacked.dta, replace
